*** Lee Bounds individual sample


* Dataset setup
	use "$data\4_individual_ano_vars.dta", clear
	drop if employer != 0 & wave == 1

* Set seed
	set seed 12345

* Set level
	set level 90

* Lee bounds CI program
	capture program drop leebci
	program leebci, rclass
		version 11.2
		args lb ub se_lb se_ub
		local cs = invnormal(1-(100-`c(level)')/100) 
		local ce = invnormal(1-(100-`c(level)')/200)
		local qd = 10^15
		forvalues cc = `cs'(0.001)`ce' {
			local qdn = ((normal(`cc'+(`ub'-`lb')/max(`se_lb', `se_ub'))-normal(-`cc')) - (1-(100-`c(level)')/100))^2
			if `qdn' < `qd' {
				local qd = `qdn'
				local cnn = `cc'
			}
		}
		scalar cilower = `lb'-`se_lb'*`cnn'
		scalar ciupper = `ub'+`se_ub'*`cnn'
	end

* Merge firm-level variables
	ren assignment assignment_ind
	merge m:1 id wave using "$data\2_firm_regressions_individual_paper.dta", keepusing(abidjan assignment empcat strata_all_coll consent) 
	keep if _merge == 3

* Save working data
	tempfile leedata
	save `leedata'

* Set up file for trimming details
	tempfile trim
	clear
	gen id_indiv = .
	gen wave = .
	gen outcome = ""
	gen trim_low = .
	gen trim_high = .
	save `trim'
	use `leedata'


* 6-month analysis (wave 2)

	* Prep for output
		tempname memhold
		tempfile results
		postfile `memhold' str32 variable double lb ub ci_lb ci_ub using `results'

	* Loop through outcomes
		foreach o of varlist aminwage wrcon sosec empquality_ind lwage satisfied hoursworked training_any {
			preserve
				* Merge required data
				drop _merge `o'
				merge m:1 id_indiv wave using "$data\4_individual_ano_reg_1.dta", keepusing(`o'_2021 missing_`o'_2021 `o')
				
				* Main regression
				replace status = 1 if status == 4
				areg `o' assignment $controls_balance `o'_2021 missing_`o'_2021 i.wave if wave == 2 $se_indiv
				predict resid_`o', residuals
				gen included_in_reg = e(sample)
				bys id_indiv: egen select_wave_2 = max(included_in_reg)

				* Calculate trimming parameters
				qui reg select_wave_2 assignment if wave == 1
				local attr_diff = abs(_b[assignment])
				local trim_treat = _b[assignment] > 0
				if `trim_treat' == 0 local trim_share = `attr_diff' / _b[_cons]
				if `trim_treat' == 1 local trim_share = `attr_diff' / (_b[_cons] + `attr_diff')
				* Trim among subjects from wave 1, because newly added subjects weren't subject to attrition
				gen wave_1 = 1 if wave == 1
				bys id_indiv: egen from_wave_1 = max(wave_1)
				count if select_wave_2 == 1 & assignment == `trim_treat' & wave == 2 & from_wave_1 == 1
				local trim_count = round(`trim_share' * r(N))
				local trim_remain = r(N) - `trim_count'
				
				* Implement trimming
				gen r = uniform()
				bysort select_wave_2 assignment from_wave_1 (`o' r): gen k = _n
				gen `o'_trim_low = `o'
				replace `o'_trim_low = . if select_wave_2 == 1 & assignment == `trim_treat' & from_wave_1 == 1 & k <= `trim_count'
				gen `o'_trim_high = `o'
				replace `o'_trim_high = . if select_wave_2 == 1 & assignment == `trim_treat' & from_wave_1 == 1 & k > `trim_remain' & !missing(k)
				
				* Calculate bounds
				qui areg `o'_trim_low assignment $controls_balance `o'_2021 missing_`o'_2021 i.wave if wave == 2 $se_indiv
				local b_trim_low = _b[assignment]
				local se_trim_low = _se[assignment]
				
				qui areg `o'_trim_high assignment $controls_balance `o'_2021 missing_`o'_2021 i.wave if wave == 2 $se_indiv
				local b_trim_high = _b[assignment]
				local se_trim_high = _se[assignment]

				if `b_trim_low' < `b_trim_high' {
					local lb = `b_trim_low'
					local se_lb = `se_trim_low'
					local ub = `b_trim_high'
					local se_ub = `se_trim_high'
				}
				else {
					local lb = `b_trim_high'
					local se_lb = `se_trim_high'
					local ub = `b_trim_low'
					local se_ub = `se_trim_low'
				}

				* Calculate confidence intervals
				leebci `lb' `ub' `se_lb' `se_ub'
				local ci_lb = scalar(cilower)
				local ci_ub = scalar(ciupper)
				
				* Display for debugging and post to memory
				di "Variable: `o'"
				di "LB: `lb'  SE_LB: `se_lb'"
				di "UB: `ub'  SE_UB: `se_ub'"
				di "CI_LB: `ci_lb'  CI_UB: `ci_ub'"
				di "Posting values:"
				di "`o', `lb', `ub', `ci_lb', `ci_ub'"
				post `memhold' ("`o'") (`lb') (`ub') (`ci_lb') (`ci_ub')

				* Save trimming details
				keep if !missing(`o') & (`o'_trim_low == . | `o'_trim_high == .) & select_wave_2 == 1 & wave == 2
				gen trim_low = 1 if !missing(`o') & `o'_trim_low == .
				gen trim_high = 1 if !missing(`o') & `o'_trim_high == .
				keep id_indiv wave trim_low trim_high
				gen outcome = "`o'"
				tempfile _trim
				save `_trim'
				use `trim'
				append using `_trim'
				save, replace
			restore
		}

	* Close results file
		postclose `memhold'

	* Load results, inspect, and format
		use `results', clear
		list 
		replace variable="Min. Wage (0/1)" if variable=="aminwage" 
		replace variable="Written Contract (0/1)" if variable=="wrcon" 
		replace variable="Social Security (0/1)" if variable=="sosec"
		replace variable="Formality Index (0-1)" if variable=="empquality_ind"
		replace variable="Wage (Log)" if variable=="lwage" 
		replace variable="Satisfied (0/1)" if variable=="satisfied" 
		replace variable="Hours worked" if variable=="hoursworked" 
		replace variable="Training Part. (0/1)" if variable=="training_any" 
		gen lb_fmt = string(lb, "%9.3f")
		gen ub_fmt = string(ub, "%9.3f")
		gen ci_lb_fmt = string(ci_lb, "%9.3f")
		gen ci_ub_fmt = string(ci_ub, "%9.3f")

	* Write output file
		texsave variable lb_fmt ub_fmt ci_lb_fmt ci_ub_fmt using "$results\01_tables\Table_S17_bounds_6M.tex", ///
			title("Lee Bounds Results") ///
			replace ///
			footnote("Notes: This table shows Lee bounds estimates.") ///
			headerlines("Variable & Lower Bound & Upper Bound & CI Lower & CI Upper \\") ///
			autonumber ///
			size(normalsize)


* 18-month analysis (wave 3)

	* Load data
		use `leedata', clear
	
	* Prep for output
		tempname memhold
		tempfile results
		postfile `memhold' str32 variable double lb ub ci_lb ci_ub using `results'

	* Loop through outcomes
		foreach o of varlist aminwage wrcon sosec empquality_ind lwage satisfied hoursworked training_any {
			preserve
				* Merge required data
				drop _merge `o'
				merge m:1 id_indiv wave using "$data\4_individual_ano_reg_2.dta", keepusing(`o'_2021 missing_`o'_2021 `o')

				* Main regression
				replace status = 1 if status == 4
				areg `o' assignment $controls_balance `o'_2021 missing_`o'_2021 i.wave if wave == 3 $se_indiv
				predict resid_`o', residuals
				gen included_in_reg = e(sample)
				bys id_indiv: egen select_wave_3 = max(included_in_reg)

				* Calculate trimming parameters
				qui reg select_wave_3 assignment if wave == 1
				local attr_diff = abs(_b[assignment])
				local trim_treat = _b[assignment] > 0
				if `trim_treat' == 0 local trim_share = `attr_diff' / _b[_cons]
				if `trim_treat' == 1 local trim_share = `attr_diff' / (_b[_cons] + `attr_diff')
				gen wave_1 = 1 if wave == 1
				bys id_indiv: egen from_wave_1 = max(wave_1)
				count if select_wave_3 == 1 & assignment == `trim_treat' & wave == 3 & from_wave_1 == 1
				local trim_count = round(`trim_share' * r(N))
				local trim_remain = r(N) - `trim_count'

				* Implement trimming
				gen r = uniform()
				bysort select_wave_3 assignment from_wave_1 (`o' r): gen k = _n
				gen `o'_trim_low = `o'
				replace `o'_trim_low = . if select_wave_3 == 1 & assignment == `trim_treat' & from_wave_1 == 1 & k <= `trim_count'
				gen `o'_trim_high = `o'
				replace `o'_trim_high = . if select_wave_3 == 1 & assignment == `trim_treat' & from_wave_1 == 1 & k > `trim_remain' & !missing(k)

				* Calculate bounds
				qui areg `o'_trim_low assignment $controls_balance `o'_2021 missing_`o'_2021 i.wave if wave == 3 $se_indiv
				local b_trim_low = _b[assignment]
				local se_trim_low = _se[assignment]
				
				qui areg `o'_trim_high assignment $controls_balance `o'_2021 missing_`o'_2021 i.wave if wave == 3 $se_indiv
				local b_trim_high = _b[assignment]
				local se_trim_high = _se[assignment]

				if `b_trim_low' < `b_trim_high' {
					local lb = `b_trim_low'
					local se_lb = `se_trim_low'
					local ub = `b_trim_high'
					local se_ub = `se_trim_high'
				}
				else {
					local lb = `b_trim_high'
					local se_lb = `se_trim_high'
					local ub = `b_trim_low'
					local se_ub = `se_trim_low'
				}

				* Calculate confidence intervals
				leebci `lb' `ub' `se_lb' `se_ub'
				local ci_lb = scalar(cilower)
				local ci_ub = scalar(ciupper)
				
				* Display for debugging and post to memory
				di "Variable: `o'"
				di "LB: `lb'  SE_LB: `se_lb'"
				di "UB: `ub'  SE_UB: `se_ub'"
				di "CI_LB: `ci_lb'  CI_UB: `ci_ub'"
				di "Posting values:"
				di "`o', `lb', `ub', `ci_lb', `ci_ub'"
				post `memhold' ("`o'") (`lb') (`ub') (`ci_lb') (`ci_ub')

				* Save trimming details
				keep if !missing(`o') & (`o'_trim_low == . | `o'_trim_high == .) & select_wave_3== 1 & wave == 3
				gen trim_low = 1 if !missing(`o') & `o'_trim_low == .
				gen trim_high = 1 if !missing(`o') & `o'_trim_high == .
				keep id_indiv wave trim_low trim_high
				gen outcome = "`o'"
				tempfile _trim
				save `_trim'
				use `trim'
				append using `_trim'
				save, replace
			restore
		}

	* Close results file
		postclose `memhold'

	* Load results, inspect, and format
		use `results', clear
		list 
		replace variable="Min. Wage (0/1)" if variable=="aminwage" 
		replace variable="Written Contract (0/1)" if variable=="wrcon" 
		replace variable="Social Security (0/1)" if variable=="sosec"
		replace variable="Formality Index (0-1)" if variable=="empquality_ind"
		replace variable="Wage (Log)" if variable=="lwage" 
		replace variable="Satisfied (0/1)" if variable=="satisfied" 
		replace variable="Hours worked" if variable=="hoursworked" 
		replace variable="Training Part. (0/1)" if variable=="training_any" 
		gen lb_fmt = string(lb, "%9.3f")
		gen ub_fmt = string(ub, "%9.3f")
		gen ci_lb_fmt = string(ci_lb, "%9.3f")
		gen ci_ub_fmt = string(ci_ub, "%9.3f")

	* Write output file
		texsave variable lb_fmt ub_fmt ci_lb_fmt ci_ub_fmt using  "$results\01_tables\Table_S17_bounds_18.tex", ///
			title("Lee Bounds Results") ///
			replace ///
			footnote("Notes: This table shows Lee bounds estimates.") ///
			headerlines("Variable & Lower Bound & Upper Bound & CI Lower & CI Upper \\") ///
			autonumber ///
			size(normalsize)


* Pooled analysis

	* Prep for output
		tempname memhold
		tempfile results
		postfile `memhold' str32 variable double lb ub ci_lb ci_ub using `results'

	* Loop through outcomes
		foreach o in aminwage wrcon sosec empquality_ind lwage satisfied hoursworked training_any {
			* Prepare trimming details
			use `trim', clear
			keep if outcome == "`o'"
			tempfile _trim
			save `_trim'
			
			* Load and merge data
			use "$data\4_individual_ano_reg_3.dta", clear
			merge 1:1 id_indiv wave using `_trim'

			* Main regression
			areg `o' assignment $controls_balance `o'_2021 missing_`o'_2021 i.wave $se_indiv

			* Implement trimming
			gen `o'_trim_low = `o' if trim_low != 1
			gen `o'_trim_high = `o' if trim_high != 1
			
			* Calculate bounds
			qui areg `o'_trim_low assignment $controls_balance `o'_2021 missing_`o'_2021 i.wave $se_indiv
			local b_trim_low = _b[assignment]
			local se_trim_low = _se[assignment]
				
			qui areg `o'_trim_high assignment $controls_balance `o'_2021 missing_`o'_2021 i.wave $se_indiv
			local b_trim_high = _b[assignment]
			local se_trim_high = _se[assignment]

			if `b_trim_low' < `b_trim_high' {
				local lb = `b_trim_low'
				local se_lb = `se_trim_low'
				local ub = `b_trim_high'
				local se_ub = `se_trim_high'
			}
			else {
				local lb = `b_trim_high'
				local se_lb = `se_trim_high'
				local ub = `b_trim_low'
				local se_ub = `se_trim_low'
			}

			* Calculate confidence intervals
			leebci `lb' `ub' `se_lb' `se_ub'
			local ci_lb = scalar(cilower)
			local ci_ub = scalar(ciupper)
				
			* Display for debugging and post to memory
			di "Variable: `o'"
			di "LB: `lb'  SE_LB: `se_lb'"
			di "UB: `ub'  SE_UB: `se_ub'"
			di "CI_LB: `ci_lb'  CI_UB: `ci_ub'"
			di "Posting values:"
			di "`o', `lb', `ub', `ci_lb', `ci_ub'"
			post `memhold' ("`o'") (`lb') (`ub') (`ci_lb') (`ci_ub')
		}

	* Close results file
		postclose `memhold'

	* Load results, inspect, and format
		use `results', clear
		list 
		replace variable="Min. Wage (0/1)" if variable=="aminwage" 
		replace variable="Written Contract (0/1)" if variable=="wrcon" 
		replace variable="Social Security (0/1)" if variable=="sosec"
		replace variable="Formality Index (0-1)" if variable=="empquality_ind"
		replace variable="Wage (Log)" if variable=="lwage" 
		replace variable="Satisfied (0/1)" if variable=="satisfied" 
		replace variable="Hours worked" if variable=="hoursworked" 
		replace variable="Training Part. (0/1)" if variable=="training_any" 
		gen lb_fmt = string(lb, "%9.3f")
		gen ub_fmt = string(ub, "%9.3f")
		gen ci_lb_fmt = string(ci_lb, "%9.3f")
		gen ci_ub_fmt = string(ci_ub, "%9.3f")

	* Write output file
		texsave variable lb_fmt ub_fmt ci_lb_fmt ci_ub_fmt using  "$results\01_tables\Table_S17_bounds_pooled.tex", ///
			title("Lee Bounds Results") ///
			replace ///
			footnote("Notes: This table shows Lee bounds estimates.") ///
			headerlines("Variable & Lower Bound & Upper Bound & CI Lower & CI Upper \\") ///
			autonumber ///
			size(normalsize)

* Reset level
	set level 95
